*************************************************************************************
************** Project: Robots and Household Financial Behavior *********************
***************************  Gomes, Jansson, and Karabulut **************************
************************ Labor Income Process Estimation ****************************
****************************** LAST MODIFIED ****************************************
******************************** 2023-06-07 *****************************************
** Stata Settings ***
cap clear all
macro drop _all
clear
set mem 1000m
set matsize 11000
set maxvar 10000
set more off
**Define Paths**
global data "DEFINE THE PATH HERE"
global data_final "DEFINE THE PATH HERE"
global projf "DEFINE THE PATH HERE"
global logs "DEFINE THE PATH HERE"

********************************************************************************
* Generate non-financial income variable (at individual level)
********************************************************************************
**Construct the working Sample for Each Year Separetely**
forvalues i=1993(1)2007 {
*** Uploading the Dataset ***
**Saving a log file**
cap log close
log using "$logs/Income_Proc_Final_RFS_pre_1999_low_`i'.log", replace
**Upload the dataset**
*Raw Data**  
insheet using "$data_final/incproc230423`i'.csv", comma clear 
*insheet using "$data/incproc210607`i'.csv", comma clear 
**Include inflation series KPI***
gen kpi1993 = (290.51/243.20)
gen kpi1994 = (290.51/248.50)
gen kpi1995 = (290.51/254.80)
gen kpi1996 = (290.51/256.00)
gen kpi1997 = (290.51/257.30)
gen kpi1998 = (290.51/257.00)
gen kpi1999 = (290.51/258.10)
gen kpi2000 = (290.51/260.70)
gen kpi2001 = (290.51/267.10)
gen kpi2002 = (290.51/272.80)
gen kpi2003 = (290.51/278.10)
gen kpi2004 = (290.51/279.20)
gen kpi2005 = (290.51/280.40)
gen kpi2006 = (290.51/284.22)
gen kpi2007 = (290.51/290.51)
**Calculate after-tax income at the individual level (use the tax scheme of 2000 for all years)**
gen pretax_inc = inc_broad*(kpi`i'/kpi2000)
sum pretax_inc, d
gen tax_allow = min(pretax_inc,8700)
replace tax_allow = 8800 + 0.25*(pretax_inc-68200) if pretax_inc>=68200 & pretax_inc<105800
replace tax_allow = 18200 if pretax_inc>=105800 & pretax_inc<111400
replace tax_allow = 18100 - 0.1*(pretax_inc-111400) if pretax_inc>=111400 & pretax_inc<205400
replace tax_allow = 8700 if pretax_inc>=205400
sum tax_allow, d
gen taxable_inc = pretax_inc - tax_allow
sum taxable_inc, d
gen muni_tax = 0.3038*taxable_inc
gen state_tax = max((taxable_inc-232600)*0.2,0)+max((taxable_inc-374000)*0.05,0)
gen tax_rate = (muni_tax+state_tax)/taxable_inc
sum muni_tax state_tax tax_rate, d
gen aftertax_inc = (pretax_inc-muni_tax-state_tax)*(kpi2000/kpi`i')
sum aftertax_inc, d
********************************************************************************
* Generate real non-financial income variable at household level
********************************************************************************
*gen broad_inc = aftertax_inc + ctrapsfl
gen broad_inc = aftertax_inc + pos_transfers
la var broad_inc "Non-Fin Income"
**Aggregate the individual income at the household level (only for household head plus spouse if present)**
drop if child==1|(child==0 & over18==0 & married==0)
bys idhh: egen broad_inc_hh=total(broad_inc)
la var broad_inc_hh "Non-Fin Income HH"
sum broad_inc_hh, d
** Winsorize the lower tail of income to 1000 kronor (following Calvet, Campbell, Gomes, and Sodini, 2021) **
replace broad_inc_hh=1000 if broad_inc_hh<1000 & mi(broad_inc_hh)==0 
**Real Income in 2007 prices**
replace broad_inc_hh = (broad_inc_hh * kpi`i') if year==`i'
la var broad_inc_hh "Real non-Fin Income HH"
**Calculate the Real Income in Logs**
gen logrealincome=log(broad_inc_hh) 
**Aggregate self employment income at the household level**
bys idhh: egen enterpr_inc_hh=total(enterpr_inc)
la var enterpr_inc_hh "Selfempl Income HH"
**Self-Employment (defined at the household level)**
gen selfemployed=1 if enterpr_inc_hh>0
replace selfemployed=0 if enterpr_inc_hh==0
la var selfemployed "Self-Employed"
** Construct Variables for different education levels **
tab edulev
keep if edulev>=1 & edulev<=9
gen college = (edulev >= 4 & edulev<9)
la var college "College and more"   
gen hschool = (edulev == 3)
la var hschool "High school"
gen nohschool = (edulev <= 2)
la var nohschool "Less than high school"
gen edu_miss = (edulev==9)
la var edu_miss "Education missing"

**Re-Define the SNI codes to match with the Robot data*
*Convert the SNI variable to a string variable
tostring sni_2, gen(SNI2_s)
*Agriculture, Fishery
replace SNI2_s="01-05" if (sni_2==1|sni_2==2|sni_2==3|sni_2==4|sni_2==5)
*Food and beverages; tobacco
replace SNI2_s="15-16" if (sni_2==15|sni_2==16)
*Textiles
replace SNI2_s="17-18-19" if (sni_2==17|sni_2==18|sni_2==19)
*Wood and Furniture; Paper
replace SNI2_s="20-21-22" if (sni_2==20|sni_2==21|sni_2==22)
*Pharmaceuticals, other chemical products
replace SNI2_s="23-24" if (sni_2==23|sni_2==24)
*Rubber and plastics; Other Chemical products; glass, ceramics, etc.
replace SNI2_s="25-26" if (sni_2==25|sni_2==26)
*Basic metals, metal products
replace SNI2_s="27-28" if (sni_2==27|sni_2==28)
*Industrial machinery
replace SNI2_s="29" if (sni_2==29)
*Electrical/Electronics 
replace SNI2_s="30-31-32-33" if (sni_2==30|sni_2==31|sni_2==32|sni_2==33)
*Automative
replace SNI2_s="34-35" if (sni_2==34|sni_2==35)
*Mining
replace SNI2_s="99998" if (sni_2==10|sni_2==11|sni_2==12|sni_2==13|sni_2==14)
*Utilities
replace SNI2_s="99997" if (sni_2==40|sni_2==41)
*Construction
replace SNI2_s="99996" if (sni_2==45)
*Education/Research
replace SNI2_s="99995" if (sni_2==80)
tab sni_2 SNI2_s

**Merge the LINDA data with the Robot Data**
sort SNI2_s 
merge m:1 SNI2_s using "$data/robot_data_2.dta" 
count
tab _merge
drop _merge

********************************************************************************
* Save the Data in Stata Format
********************************************************************************
sort id
save "$projf/data/incprocess_final_RFS_`i'.dta", replace
**Close the log-file
cap log close
	}
	
********************************************************************************
* Construct a Panel Dataset between 1993-2007
********************************************************************************
**Saving a log file**
cap log close
log using "$logs/Construct_panel_dataset_final_RFS.log", replace
** Upload the Yearly Data ** 
use "$projf/data/incprocess_final_RFS_1993.dta", clear
forvalues j=1994(1)2007 {
	append using "$projf/data/incprocess_final_RFS_`j'.dta", force
}

** Robot Exposure (based on 1993 industry) **
gen low_rob_exp_pre=1 if (tercile_robot==1) & year==1993 & employed==1 & selfemployed==0 & age>=20 & age<=64
replace low_rob_exp_pre=0.5 if (tercile_robot==2) & year==1993 & employed==1 & selfemployed==0 & age>=20 & age<=64
replace low_rob_exp_pre=0 if (tercile_robot==3) & year==1993 & employed==1 & selfemployed==0 & age>=20 & age<=64
bys id: egen mean_low_rob_exp_pre=mean(low_rob_exp_pre)
drop low_rob_exp_pre
tab mean_low_rob_exp_pre
** Robot Exposure (based on 1999 industry) **
gen low_rob_exp_post=1 if (tercile_robot==1) & year==1999 & employed==1 & selfemployed==0 & age>=20 & age<=64
replace low_rob_exp_post=0.5 if (tercile_robot==2) & year==1999 & employed==1 & selfemployed==0 & age>=20 & age<=64
replace low_rob_exp_post=0 if (tercile_robot==3) & year==1999 & employed==1 & selfemployed==0 & age>=20 & age<=64
bys id: egen mean_low_rob_exp_post=mean(low_rob_exp_post)
drop low_rob_exp_post
tab mean_low_rob_exp_post
** Keep maximum of 2 individuals per households **
sort idhh year
bys idhh year: gen order_obs_year=_n 
bys idhh year: egen max_obs_year=max(order_obs_year)
sum max_obs_year, d 
drop if max_obs_year>2
drop max_obs_year order_obs_year
** Keep only the household head **
keep if head9307==1
** Keep only if minimum income exceeds SEK 10,000 **
bys idhh: egen min_inc=min(broad_inc_hh)
drop if min_inc<10000
** Winsorize the upper tail of income if income is above the 99.99 percentile (following Calvet, Campbell, Gomes, and Sodini, 2021) **
winsor2 logrealincome, cuts(0 99.99) replace 
*Keep only if at least 3 observations *
bys idhh: gen order_obs_new=_n 
bys idhh: egen max_obs_new=max(order_obs_new)
bys idhh: egen min_obs_new=min(order_obs_new)
sum min_obs_new max_obs_new, d
keep if max_obs_new>=3
** Rename the household ID variable ** 
ren idhh pid 
** Generate robot exposure and time period indicator variables **
gen mean_med_rob_exp_pre = 1 if mean_low_rob_exp_pre == .5
gen mean_high_rob_exp_pre = 1 if mean_low_rob_exp_pre == 0
gen mean_med_rob_exp_post = 1 if mean_low_rob_exp_post == .5
gen mean_high_rob_exp_post = 1 if mean_low_rob_exp_post == 0
gen pre_period = 1 if (year>=1993 & year<=1998)
gen pre_period_adj = 1 if (year>=1994 & year<=1998)
gen pre_period_longer = 1 if (year>=1993 & year<=2001)
gen post_period = 1 if (year>=1999 & year<=2007)
gen post_period_adj = 1 if (year>=2000 & year<=2007)
gen post_period_longer = 1 if (year>=1999 & year<=2007)

** Save the Panel Data in Stata Format **
sort id year
save "$projf/data/incprocess_panel_final_RFS.dta", replace
cap log close

*cf _all using "$projf/data/incprocess_panel_final_RFS_tidigare.dta", verbose

*use "$projf/data/incprocess_panel_final_RFS_tidigare.dta", clear
*cf _all using "$projf/data/incprocess_panel_final_RFS.dta", verbose

******************************************************************************************************
******************************************************************************************************

